Variables:
Read the 'aggregated_data.csv' to dataframe.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu as MWU
import datetime as dt
import seaborn as sns
from sklearn.neighbors import LocalOutlierFactor
df = pd.read_csv('aggregated_data.csv')
pd.options.display.max_columns = None
df.head(5)
| Startup number | Burner number | Attempt to start | Attempt beginning | Attempt end | Beginning of analyzed period | End of analyzed period | Steam valve opened | Steam valve closed | Valve opening duration | Mean value of burner brightness (flame sensor) | Flame goes out | Mean steam pressure | Drop of brigthness (flame sensor) | Drop of steam pressure | Downtime before startup | Attempt outcome | Burner warmup time by steam[h] | Mean temperature in the warmup time | Mean steam temperature for an hour before start | Mean steam pressure for an hour before start | Mean fuel oil pressure | Ratio of mean pressures | Mean steam pressure in ring 1 | Mean fuel oil pressure in ring 1 | Ratio of pressure in ring 1 | Startup order (burner numbers) | Startup order - first 4 | Fuel oil temperature before startup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 1.0 | 1.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 09:27:12 | 2016-01-19 14:44:44 | 0 days 05:17:32.000000000 | 97.308594 | 2016-01-19 14:44:43 | 0.759057 | 0.430021 | 0.541096 | 4 days 13:13:00.000000000 | succeed | 0.856111 | 260.191696 | 260.191696 | 0.759217 | 0.385376 | 1.970068 | 0.880000 | 0.460000 | 1.913044 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 110.300429 |
| 1 | 1.0 | 2.0 | 1.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 09:28:16 | 2016-01-19 09:28:36 | 0 days 00:00:20.000000000 | 0.000000 | NaN | 0.506667 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 0.873611 | 260.588541 | 260.588541 | 0.600000 | 0.292222 | 2.053232 | 1.000000 | 0.480000 | 2.083333 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 111.005963 |
| 2 | 1.0 | 2.0 | 2.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 09:57:12 | 2016-01-19 09:57:31 | 0 days 00:00:19.000000000 | 0.000000 | NaN | 0.488000 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 1.355833 | 267.639146 | 267.639146 | 0.568750 | 0.305000 | 1.864754 | 0.940000 | 0.480000 | 1.958333 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 110.798885 |
| 3 | 1.0 | 2.0 | 3.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 10:38:21 | 2016-01-19 10:38:41 | 0 days 00:00:20.000000000 | 0.000000 | NaN | 0.498095 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 2.041667 | 272.374878 | 272.374878 | 0.588889 | 0.296667 | 1.985019 | 0.934286 | 0.450000 | 2.076190 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 111.613316 |
| 4 | 1.0 | 2.0 | 4.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 10:39:37 | 2016-01-19 10:39:57 | 0 days 00:00:20.000000000 | 0.000000 | NaN | 0.504762 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 2.062778 | 272.471120 | 272.471120 | 0.584444 | 0.303333 | 1.926740 | 0.980000 | 0.485714 | 2.017647 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 110.080826 |
| Startup number | Burner number | Attempt to start | Attempt beginning | Attempt end | Beginning of analyzed period | End of analyzed period | Steam valve opened | Steam valve closed | Valve opening duration | Mean value of burner brightness (flame sensor) | Flame goes out | Mean steam pressure | Drop of brigthness (flame sensor) | Drop of steam pressure | Downtime before startup | Attempt outcome | Burner warmup time by steam[h] | Mean temperature in the warmup time | Mean steam temperature for an hour before start | Mean steam pressure for an hour before start | Mean fuel oil pressure | Ratio of mean pressures | Mean steam pressure in ring 1 | Mean fuel oil pressure in ring 1 | Ratio of pressure in ring 1 | Startup order (burner numbers) | Startup order - first 4 | Fuel oil temperature before startup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 1.0 | 1.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 09:27:12 | 2016-01-19 14:44:44 | 0 days 05:17:32.000000000 | 97.308594 | 2016-01-19 14:44:43 | 0.759057 | 0.430021 | 0.541096 | 4 days 13:13:00.000000000 | succeed | 0.856111 | 260.191696 | 260.191696 | 0.759217 | 0.385376 | 1.970068 | 0.880000 | 0.460000 | 1.913044 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 110.300429 |
| 1 | 1.0 | 2.0 | 1.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 09:28:16 | 2016-01-19 09:28:36 | 0 days 00:00:20.000000000 | 0.000000 | NaN | 0.506667 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 0.873611 | 260.588541 | 260.588541 | 0.600000 | 0.292222 | 2.053232 | 1.000000 | 0.480000 | 2.083333 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 111.005963 |
| 2 | 1.0 | 2.0 | 2.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 09:57:12 | 2016-01-19 09:57:31 | 0 days 00:00:19.000000000 | 0.000000 | NaN | 0.488000 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 1.355833 | 267.639146 | 267.639146 | 0.568750 | 0.305000 | 1.864754 | 0.940000 | 0.480000 | 1.958333 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 110.798885 |
| 3 | 1.0 | 2.0 | 3.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 10:38:21 | 2016-01-19 10:38:41 | 0 days 00:00:20.000000000 | 0.000000 | NaN | 0.498095 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 2.041667 | 272.374878 | 272.374878 | 0.588889 | 0.296667 | 1.985019 | 0.934286 | 0.450000 | 2.076190 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 111.613316 |
| 4 | 1.0 | 2.0 | 4.0 | 2016-01-19 12:36:00 | 2016-01-14 23:23:00 | 2016-01-18 21:36:00 | 2016-01-19 15:36:00 | 2016-01-19 10:39:37 | 2016-01-19 10:39:57 | 0 days 00:00:20.000000000 | 0.000000 | NaN | 0.504762 | NaN | NaN | 4 days 13:13:00.000000000 | no ignition | 2.062778 | 272.471120 | 272.471120 | 0.584444 | 0.303333 | 1.926740 | 0.980000 | 0.485714 | 2.017647 | 3, 1, 4, 2, 7, 5, 8, 6, 4, 3, 7, 2, 3, 4, 3, 4... | 3, 1, 4, 2 | 110.080826 |
sub1 = df.copy()
sub1['Attempt beginning'] = pd.to_datetime(sub1['Attempt beginning'])
sub1['Attempt end'] = pd.to_datetime(sub1['Attempt end'])
sub1['Beginning of analyzed period'] = pd.to_datetime(sub1['Beginning of analyzed period'])
sub1['End of analyzed period'] = pd.to_datetime(sub1['End of analyzed period'])
sub1['Steam valve opened'] = pd.to_datetime(sub1['Steam valve opened'])
sub1['Steam valve closed'] = pd.to_datetime(sub1['Steam valve closed'])
sub1['Flame goes out'] = pd.to_datetime(sub1['Flame goes out'])
sub1['Downtime before startup'] = pd.to_timedelta(sub1['Downtime before startup'])
sub1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1158 entries, 0 to 1157 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Startup number 1158 non-null float64 1 Burner number 1158 non-null float64 2 Attempt to start 1158 non-null float64 3 Attempt beginning 1158 non-null datetime64[ns] 4 Attempt end 1158 non-null datetime64[ns] 5 Beginning of analyzed period 1158 non-null datetime64[ns] 6 End of analyzed period 1158 non-null datetime64[ns] 7 Steam valve opened 1158 non-null datetime64[ns] 8 Steam valve closed 1158 non-null datetime64[ns] 9 Valve opening duration 1158 non-null object 10 Mean value of burner brightness (flame sensor) 1158 non-null float64 11 Flame goes out 715 non-null datetime64[ns] 12 Mean steam pressure 1158 non-null float64 13 Drop of brigthness (flame sensor) 715 non-null float64 14 Drop of steam pressure 715 non-null float64 15 Downtime before startup 1158 non-null timedelta64[ns] 16 Attempt outcome 1158 non-null object 17 Burner warmup time by steam[h] 1158 non-null float64 18 Mean temperature in the warmup time 1158 non-null float64 19 Mean steam temperature for an hour before start 1158 non-null float64 20 Mean steam pressure for an hour before start 1117 non-null float64 21 Mean fuel oil pressure 1117 non-null float64 22 Ratio of mean pressures 1117 non-null float64 23 Mean steam pressure in ring 1 1158 non-null float64 24 Mean fuel oil pressure in ring 1 1158 non-null float64 25 Ratio of pressure in ring 1 1158 non-null float64 26 Startup order (burner numbers) 1158 non-null object 27 Startup order - first 4 1158 non-null object 28 Fuel oil temperature before startup 1158 non-null float64 dtypes: datetime64[ns](7), float64(17), object(4), timedelta64[ns](1) memory usage: 262.5+ KB
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1158 entries, 0 to 1157 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Startup number 1158 non-null float64 1 Burner number 1158 non-null float64 2 Attempt to start 1158 non-null float64 3 Attempt beginning 1158 non-null datetime64[ns] 4 Attempt end 1158 non-null datetime64[ns] 5 Beginning of analyzed period 1158 non-null datetime64[ns] 6 End of analyzed period 1158 non-null datetime64[ns] 7 Steam valve opened 1158 non-null datetime64[ns] 8 Steam valve closed 1158 non-null datetime64[ns] 9 Valve opening duration 1158 non-null object 10 Mean value of burner brightness (flame sensor) 1158 non-null float64 11 Flame goes out 715 non-null datetime64[ns] 12 Mean steam pressure 1158 non-null float64 13 Drop of brigthness (flame sensor) 715 non-null float64 14 Drop of steam pressure 715 non-null float64 15 Downtime before startup 1158 non-null timedelta64[ns] 16 Attempt outcome 1158 non-null object 17 Burner warmup time by steam[h] 1158 non-null float64 18 Mean temperature in the warmup time 1158 non-null float64 19 Mean steam temperature for an hour before start 1158 non-null float64 20 Mean steam pressure for an hour before start 1117 non-null float64 21 Mean fuel oil pressure 1117 non-null float64 22 Ratio of mean pressures 1117 non-null float64 23 Mean steam pressure in ring 1 1158 non-null float64 24 Mean fuel oil pressure in ring 1 1158 non-null float64 25 Ratio of pressure in ring 1 1158 non-null float64 26 Startup order (burner numbers) 1158 non-null object 27 Startup order - first 4 1158 non-null object 28 Fuel oil temperature before startup 1158 non-null float64 dtypes: datetime64[ns](7), float64(17), object(4), timedelta64[ns](1) memory usage: 262.5+ KB
sub2 = sub1.copy()
for i in range(9):
for j in sub2[['Attempt outcome']]:
if i>0 and i<9:
sub3 = sub2[sub2['Burner number']== i]
sub3 = sub3.groupby('Attempt outcome').count().reset_index()
plt.bar(sub3.index, sub3['Startup number'], color=['tab:red','tab:grey','tab:green']);
plt.xticks(sub3.index, sub3['Attempt outcome']);
plt.ylabel('# of times');
plt.title('Burner number {}'.format(i));
plt.show();
for i in range(9):
for j in sub3[['Attempt outcome']]:
if i>0 and i<9:
sub3 = sub2[sub2['Burner number']== i]
sub3 = sub3[sub3['Attempt outcome']!= 'no ignition']
sns.scatterplot(x='Mean steam pressure', y='Mean value of burner brightness (flame sensor)',
data=sub3, hue='Attempt outcome', s=150)
plt.title('Burner number {}'.format(i));
plt.show();
sub4 = sub1.copy()
sub4 = sub4[['Mean value of burner brightness (flame sensor)','Mean steam pressure','Mean temperature in the warmup time',
'Mean steam temperature for an hour before start', 'Mean steam pressure for an hour before start',
'Mean fuel oil pressure', 'Ratio of mean pressures', 'Mean steam pressure in ring 1',
'Mean fuel oil pressure in ring 1', 'Ratio of pressure in ring 1', 'Attempt outcome']]
sub4.set_index('Attempt outcome')
sub4.loc[sub4['Attempt outcome'] == 'succeed','Attempt outcome'] = 1
sub4.loc[sub4['Attempt outcome'] == 'failed','Attempt outcome'] = 2
sub4 = sub4[sub4['Attempt outcome'] != 'no ignition']
sub4 = sub4.reset_index(drop=True)
sub4 = sub4.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
sub4['Attempt outcome'] = pd.to_numeric(sub4['Attempt outcome'], errors='coerce')
lof = LocalOutlierFactor(n_neighbors=10, contamination='auto', novelty=False)
lof.fit(sub4)
lof.negative_outlier_factor_
sub4_results = sub4.copy()
neg_values = lof.negative_outlier_factor_
sub4_results['negative_lof'] = neg_values
for i in range(9):
for j,col1 in enumerate(sub4.columns):
for k,col2 in enumerate(sub4.columns):
if i==1 and j!=k and j<k:
sub5 = sub4_results[sub4_results['Attempt outcome']!= 'no ignition']
sns.scatterplot(x=col1, y=col2, hue='negative_lof', palette="rocket", style='Attempt outcome',
data=sub5, s=150)
plt.title('{} vs {}'.format(col1,col2));
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), loc='upper left', bbox_to_anchor=(1,1));
plt.show();
sub6 = sub1.copy()
sub6 = sub6.set_index('Burner number')
sub6 = sub6[['Mean value of burner brightness (flame sensor)','Mean steam pressure','Mean temperature in the warmup time',
'Mean steam temperature for an hour before start', 'Mean steam pressure for an hour before start',
'Mean fuel oil pressure', 'Ratio of mean pressures', 'Mean steam pressure in ring 1',
'Mean fuel oil pressure in ring 1', 'Ratio of pressure in ring 1', 'Fuel oil temperature before startup',
'Attempt outcome']]
for i in range(9):
for j,col1 in enumerate(sub6.columns):
if i==1 and j!=11:
sub7 = sub6[sub6['Attempt outcome']!= 'no ignition']
sns.boxplot(x=col1, y='Attempt outcome', hue=sub7.index, data=sub7);
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), loc='upper left', bbox_to_anchor=(1,1));
plt.show();
sub8 = sub1.copy()
sub8 = sub8[['Mean value of burner brightness (flame sensor)','Mean steam pressure','Mean temperature in the warmup time',
'Mean steam temperature for an hour before start', 'Mean steam pressure for an hour before start',
'Mean fuel oil pressure', 'Ratio of mean pressures', 'Mean steam pressure in ring 1',
'Mean fuel oil pressure in ring 1', 'Ratio of pressure in ring 1', 'Fuel oil temperature before startup',
'Attempt outcome', 'Burner number']]
sub8 = sub8[sub8['Burner number'] == 1]
for i in range(9):
for j,col1 in enumerate(sub8.columns):
if i==1 and j<11:
sub9 = sub8[sub8['Attempt outcome']!= 'no ignition']
sns.boxplot(x=col1, y='Attempt outcome', data=sub9, palette=['tab:green','tab:red']);
plt.show();
sub8_success = sub8[sub8['Attempt outcome']=='succeed']
sub8_failed = sub8[sub8['Attempt outcome']=='failed']
U, p = MWU(sub8_success['Mean steam pressure'], sub8_failed['Mean steam pressure'])
p
0.0011557331251609588
U, p = MWU(sub8_success['Mean steam pressure for an hour before start'],
sub8_failed['Mean steam pressure for an hour before start'])
p
4.582836672387912e-07
U, p = MWU(sub8_success['Mean temperature in the warmup time'],
sub8_failed['Mean temperature in the warmup time'])
p
0.004810279735137668
U, p = MWU(sub8_success['Fuel oil temperature before startup'],
sub8_failed['Fuel oil temperature before startup'])
p
0.44136199389291814
U, p = MWU(sub8_success['Mean fuel oil pressure in ring 1'],
sub8_failed['Mean fuel oil pressure in ring 1'])
p
0.3258492910241486
sub8_success['Mean steam pressure'].median()
0.7446897327899933
sub8_failed['Mean steam pressure'].median()
0.7106395065784454
sub8_success['Mean steam pressure for an hour before start'].median()
0.7449118196964264
sub8_failed['Mean steam pressure for an hour before start'].median()
0.8035866022109985